Background Subtraction

Trying to understand when an image has enough difference from background

In [39]:
import tables
import numpy as np
import h5py as h5
import matplotlib.pyplot as plt
import scipy
from scipy.interpolate import griddata
from sklearn.impute import SimpleImputer
import scipy.ndimage as ndi
from skimage.restoration import inpaint
from skimage.restoration import (denoise_tv_chambolle, denoise_bilateral,
                                 denoise_wavelet, estimate_sigma)
import cv2
from astropy.convolution import convolve, convolve_fft
import skimage.filters as filters
import skimage.segmentation as segmentation
import skimage.color as color
import skimage.morphology as morphology
import math

Auxiliary functions

interpolate frame

Interpolates a frame to remove bad points. Three methods available: imputer, inpaint and grid

In [2]:
'''
Interpolates a frame to remove bad points. 
Three methods available: imputer, inpaint and grid
'''
def interpolate_frame(frame,max_height=4500, min_height=0, method="imput"):
    if(method == "imput"):
        shape = frame.shape
        frame_flat = frame.flatten()
        invalid_mins = np.argwhere(frame_flat <= min_height)
        invalid_maxs = np.argwhere(frame_flat >= max_height)
        frame_flat[invalid_mins] = np.nan
        frame_flat[invalid_maxs] = np.nan
        reshape_frame = np.reshape(frame_flat, shape)
        imputer = SimpleImputer(missing_values=np.nan, strategy="mean")
        result = imputer.fit_transform(reshape_frame)
        return result
    
    if(method == "inpaint"):
        mask = np.zeros(frame.shape)
        mask[frame <= min_height] = 1
        mask[frame >= max_height] = 1
        result = inpaint.inpaint_biharmonic(frame, mask)
        return result
    
    if(method == "grid"):
        points = np.nonzero(frame)
        values = frame[points]
        grid_x, grid_y = np.mgrid[0:720:720j, 0:1280:1280j]
        result = griddata(points, values, (grid_x, grid_y), method="nearest")
        return result
    
    if(method == "convolve"):
        shape = frame.shape
        frame_flat = frame.flatten()
        invalid_mins = np.argwhere(frame_flat <= min_height)
        invalid_maxs = np.argwhere(frame_flat >= max_height)
        frame_flat[invalid_mins] = np.nan
        frame_flat[invalid_maxs] = np.nan
        reshape_frame = np.reshape(frame_flat, shape)
        result = convolve_fft(reshape_frame, [[1,1,0,1,1],[1,1,0,1,1],[1,1,0,1,1]], boundary="wrap")
        return result
    
    return frame

frames median

Calculates the median frame from the given frame indexes and file.

In [3]:
'''
Calculates the median frame from the given frame indexes and file.
'''
def frames_median(file_name, indexes):
    file = h5.File(file_name, 'r')
    frames_array = [file.get("frame_"+str(f)) for f in indexes]
    frames = np.array(frames_array)
    median = np.median(frames, axis = 0)
    return median

remove artifacts

Removes artifacts from a previously subtracted image. Only allows "objects" bigger than a threshold value (5000)

In [337]:
'''
Removes artifacts from a previously subtracted image. 
Only allows "objects" bigger than a threshold value
'''
def remove_artifacts(frame, min_size=6500, low_pass=1000):
    markers = np.zeros_like(frame)
    markers[frame < low_pass] = 1
    bools = markers.astype(np.bool)
    mask_without_holes = morphology.remove_small_holes(bools, area_threshold=min_size, connectivity=2)
    subtracted = frame.copy()
    subtracted[mask_without_holes] = 0
    return subtracted

Get median from all frames with no passing objects

Some frames have bad readings and come out with weird values. Therefore we apply the median to get a more clean image (see frames_median).

In [5]:
stops = [[0,23],
    [30,66],
    [77,83],
    [102,114],
    [121,125],
    [134,136],
    [145,208],
    [217,255],
    [276,288],
    [297,307],
    [316,332],
    [345,359],
    [378,399],
    [446,449]]
indexes = np.concatenate([np.arange(s[0], s[1]+1) for s in stops])
median_frame = frames_median('/media/nas/PeopleDetection/up_realsense_frames/Data/Frames/Depth/5/28/12_57.h5', indexes)
In [6]:
file = h5.File('/media/nas/PeopleDetection/up_realsense_frames/Data/Frames/Depth/5/28/12_57.h5', 'r')
bad_frame = np.array(file.get("frame_0"),dtype=float)
good_frame = np.array(file.get("frame_1"), dtype=float)
rgb_frame = cv2.imread("/media/nas/PeopleDetection/up_realsense_frames/Data/Frames/Color/5/28/12_57_1_c1.png")
In [324]:
%matplotlib notebook
plt.figure()

plt.subplot(221)
plt.imshow(median_frame, cmap="bone")
plt.title("median")

plt.subplot(222)
plt.imshow(bad_frame, cmap="bone")
plt.title("bad frame")

plt.subplot(223)
plt.imshow(good_frame, cmap="bone")
plt.title("good frame")

plt.subplots_adjust(right=0.85, left=0.06)
cax = plt.axes([0.9, 0.1, 0.03, 0.8])
plt.colorbar(cax=cax)
plt.show()
In [327]:
%matplotlib notebook
plt.figure()

plt.subplot(121)
plt.imshow(median_frame, cmap="bone")
plt.title("median")

plt.subplot(122)
plt.imshow(rgb_frame)
plt.title("rgb")

plt.subplots_adjust(right=1, left=0.06)
plt.show()

Interpolate error data from the median image

Error data is considered all data where values are either:

  • greater than camera height (4.5m)
  • less than floor (0m)

Many points and regions appear where due to camera errors or light reflection problems, the measured height is 0 or negative or is a value way above the camera's measured height.

By interpolating we try to reduce these errors.

Here we compare 3 methods (see interpolate_frame):

The fastest is by far the imputer, followed by grid. The slowest is inpaint which takes about 20 seconds. All methods work well in this case but the imputer generates the image with the least amount of noise.

In [9]:
#%%time
imputed_median = interpolate_frame(median_frame, method="imput")
CPU times: user 64.5 ms, sys: 0 ns, total: 64.5 ms
Wall time: 20.8 ms
In [10]:
#%%time
inpainted_median = interpolate_frame(median_frame, method="inpaint")
CPU times: user 46.1 s, sys: 655 ms, total: 46.8 s
Wall time: 21.9 s
In [11]:
#%%time
grided_median = interpolate_frame(median_frame, method="grid")
CPU times: user 2.54 s, sys: 15.4 ms, total: 2.56 s
Wall time: 1.98 s
In [328]:
%matplotlib notebook
plt.figure()

plt.subplot(221)
plt.imshow(median_frame, cmap="bone")
plt.title("original")

plt.subplot(222)
plt.imshow(imputed_median, cmap="bone")
plt.title("imputed")

plt.subplot(223)
plt.imshow(inpainted_median, cmap="bone")
plt.title("inpainted")

plt.subplot(224)
plt.imshow(grided_median, cmap="bone")
plt.title("grided")



plt.show()

Analyse frame with an object (person)

Imputation generates an image with a lot of noise when not used in a median image. This happens because just one frame has much more noise than the median frame, and thus the column mean interpolation fills the gaps with inconsistent values.

Both the inpaint and grid are better options here. Their outputs are much more consistent with the outputs observed in the previous topic.

In [18]:
person_frame = np.array(file.get("frame_70"), dtype=float)
person_frame_rgb = cv2.imread("/media/nas/PeopleDetection/up_realsense_frames/Data/Frames/Color/5/28/12_57_70_c1.png")
In [19]:
%matplotlib notebook
plt.figure()

plt.subplot(121)
plt.imshow(person_frame, cmap="bone")
plt.title("person")

plt.subplot(122)
plt.imshow(person_frame_rgb)
plt.title("rgb")

plt.subplots_adjust(right=1, left=0.06)
plt.show()
In [20]:
imputed_person_frame = interpolate_frame(person_frame, method="imput")
In [21]:
grided_person_frame = interpolate_frame(person_frame, method="grid")
In [22]:
inpainted_person_frame = interpolate_frame(person_frame, method="inpaint")
In [23]:
%matplotlib notebook
plt.figure()

plt.subplot(221)
plt.imshow(person_frame, cmap="bone")
plt.title("original")

plt.subplot(222)
plt.imshow(inpainted_person_frame, cmap="bone")
plt.title("inpaint")

plt.subplot(223)
plt.imshow(grided_person_frame, cmap="bone")
plt.title("grid")

plt.subplot(224)
plt.imshow(imputed_person_frame, cmap="bone")
plt.title("imputed")

plt.subplots_adjust(right=1, left=0.06)
plt.show()

Subtract background from image

Given a corrected frame, subtract the corrected median background, in order to detect the presence of an object.

Subtracting the background will always leave some noise as the frames are not perfectly superimposed. The remaining artifacts have to be filtered out in order to isolate the objects in scene.

We use the frames corrected using the griddata method as it is fast and yelds good cohesion between the points in the object.

In [329]:
%matplotlib notebook
plt.figure()

plt.subplot(121)
plt.imshow(grided_median, cmap="bone")
plt.title("empty")

plt.subplot(122)
plt.imshow(grided_person_frame, cmap="bone")
plt.title("person")

plt.show()
In [332]:
grided_empty_good_frame = interpolate_frame(good_frame, method="grid")
grided_empty_bad_frame = interpolate_frame(bad_frame, method="grid")

To get the difference between the median background and the other frames we use a simple absolute difference from openCV - absdiff

In [333]:
person_frame_dif = cv2.absdiff(grided_median, grided_person_frame)
empty_good_frame_dif = cv2.absdiff(grided_median, grided_empty_good_frame)
empty_bad_frame_dif = cv2.absdiff(grided_median, grided_empty_bad_frame)
In [334]:
%matplotlib notebook
plt.figure()
plt.subplot(221)
plt.imshow(empty_good_frame_dif, cmap="bone")
plt.title("empty (good)")


plt.subplot(222)
plt.imshow(empty_bad_frame_dif, cmap="bone")
plt.title("empty (bad)")

plt.subplot(223)
plt.imshow(person_frame_dif, cmap="bone")
plt.title("person")

plt.show()

The difference is not perfect and generates artifacts. These have to be removed in the following way (see remove_artifacts):

  • First create a mask with same shape as the frame.
  • Then set the mask to 1 on positions where the value is less than a low_pass.
  • Convert the mask to boolean values because the algorithm likes it more this way :)
  • Apply the scikit-image function remove_small_holes to the mask to remove the small artifacts from it.
  • Apply the mask to the frame.

When removing the holes, the value of min_size is derived from experimentation. It is the largest value that removes all small artifacts but still small enough to maintain big artificats, wich will be assumed to be passing objects.

Both min_size and low_pass will have to be adjusted to find the best general values.

In [335]:
person_subtracted = remove_artifacts(person_frame_dif)
empty_good_subtracted = remove_artifacts(empty_good_frame_dif)
empty_bad_subtracted = remove_artifacts(empty_bad_frame_dif)
In [336]:
%matplotlib notebook
plt.figure()

plt.subplot(221)
plt.imshow(person_subtracted, cmap="bone")
plt.title("person")

plt.subplot(222)
plt.imshow(empty_good_subtracted, cmap="bone")
plt.title("empty(good)")

plt.subplot(223)
plt.imshow(empty_bad_subtracted, cmap="bone")
plt.title("empty(bad)")

plt.show()

The example works for proper frames but corrupted frames generate bad results.

How to remove these? Discard if max() is greater than a threshold?

Seems bad frames are always the first... Discard first X frames?

Experiments with more complex frames

Trying the method with some frames where more than one person can be found.

In [43]:
test_frame_indexes = [27, 260, 261, 294, 311, 339, 361]

test_frames = np.array([file.get("frame_"+str(i)) for i in test_frame_indexes], dtype=float)
In [185]:
plt_rows = math.ceil(len(test_frame_indexes)/2.)
In [184]:
%matplotlib notebook
plt.figure(figsize=(7,11))

for i in range(1, len(test_frame_indexes)+1):
    plt.subplot(plt_rows*100 + 2*10 + i)
    plt.imshow(test_frames[i-1], cmap="bone")
    plt.title(str(test_frame_indexes[i-1]))

plt.subplots_adjust(right=0.85, left=0.06)
cax = plt.axes([0.9, 0.1, 0.03, 0.8])
plt.colorbar(cax=cax)
plt.show()
In [82]:
vec_interpolate_frame = np.vectorize(interpolate_frame, excluded=["method"], signature="(m,n)->(m,n)")
In [88]:
interpolated_test_frames = vec_interpolate_frame(test_frames, method="grid")
In [338]:
%matplotlib notebook
plt.figure(figsize=(7,11))

for i in range(1, len(test_frame_indexes)+1):
    plt.subplot(plt_rows*100 + 2*10 + i)
    plt.imshow(interpolated_test_frames[i-1], cmap="bone")
    plt.title(str(test_frame_indexes[i-1]))

plt.show()
In [105]:
test_frames_dif = []

for fr in interpolated_test_frames:
    test_frames_dif.append(cv2.absdiff(grided_median, fr))
    
test_frames_dif = np.array(test_frames_dif)
In [339]:
%matplotlib notebook
plt.figure(figsize=(7,11))

for i in range(1, len(test_frame_indexes)+1):
    plt.subplot(plt_rows*100 + 2*10 + i)
    plt.imshow(test_frames_dif[i-1], cmap="bone")
    plt.title(str(test_frame_indexes[i-1]))

plt.show()
In [167]:
vec_remove_artifacts = np.vectorize(remove_artifacts, signature="(m,n)->(m,n)", excluded=["low_pass", "min_size"])
In [168]:
test_frames_subtracted = vec_remove_artifacts(test_frames_dif)
In [340]:
%matplotlib notebook
plt.figure(figsize=(7,11))

for i in range(1, len(test_frame_indexes)+1):
    plt.subplot(plt_rows*100 + 2*10 + i)
    plt.imshow(test_frames_subtracted[i-1], cmap="bone")
    plt.title(str(test_frame_indexes[i-1]))

plt.show()

Function with whole algorithm

In [282]:
def has_object(background, frame, method="grid"):
    interpolated = interpolate_frame(frame, method=method)
    dif = cv2.absdiff(background, interpolated)
    subtracted = remove_artifacts(dif)
    has_obj = subtracted.max() > 0
    return np.array([has_obj, subtracted])

Validation with random frames

Detecting the presence of objects in random selected frames from the file. We use both the imputer and griddata to detect the objects.

The imputer seems to be able to detect objects with the same accuracy, and takes less than a second for 30 frames. However the objects after subtraction seem much more distorted than when using griddata.

If the intent is to use the subtraction for something else, then grid should be used. If it is only for detecting presence, imputer is the better choice.

In [283]:
vec_has_object = np.vectorize(has_object, excluded=["background", "method"], signature="(m,n),(m,n)->(r)")
In [294]:
test_set = np.random.randint(0,450,30)
test_set_frames = np.array([file.get("frame_"+str(f)) for f in test_set], dtype=float)
test_set_frames_rgb = np.array([cv2.imread("/media/nas/PeopleDetection/up_realsense_frames/Data/Frames/Color/5/28/12_57_"+str(f)+"_c1.png") for f in test_set])
In [295]:
%%time
test_set_result_imputer = vec_has_object(imputed_median, test_set_frames, method="imput")
CPU times: user 3.09 s, sys: 35.8 ms, total: 3.12 s
Wall time: 831 ms
In [296]:
%%time
test_set_result_grid = vec_has_object(grided_median, test_set_frames, method="grid")
CPU times: user 1min 17s, sys: 296 ms, total: 1min 17s
Wall time: 59.5 s
In [323]:
%matplotlib notebook
fig = plt.figure(figsize=(7,16))

length = len(test_set)
rows = math.ceil(length/2.)

for i in range(1, length+1):
    plt.subplot(rows, 4, i)
    plt.imshow(test_set_result_imputer[i-1][1], cmap="bone")
    plt.title(str(test_set[i-1]) + ": " + str(test_set_result_imputer[i-1][0]))
    plt.axis("off")

plt.show()
fig.tight_layout()
In [322]:
%matplotlib notebook
fig = plt.figure(figsize=(7,16))

length = len(test_set)
rows = math.ceil(length/2.)

for i in range(1, length+1):
    plt.subplot(rows, 4, i)
    plt.imshow(test_set_result_grid[i-1][1], cmap="bone")
    plt.title(str(test_set[i-1]) + ": " + str(test_set_result_grid[i-1][0]))
    plt.axis("off")

plt.show()

fig.tight_layout()
In [321]:
%matplotlib notebook
fig = plt.figure(figsize=(7,16))

length = len(test_set)
rows = math.ceil(length/2.)

for i in range(1, length+1):
    plt.subplot(rows, 4, i)
    plt.imshow(test_set_frames_rgb[i-1])
    plt.title(str(test_set[i-1]) + ": " + str(test_set_result_grid[i-1][0]))
    plt.axis("off")

plt.show()
fig.tight_layout()